#########################################################################################
##################################  SIMULATION 1   ######################################
#########################################################################################
#########################################################################################

# Run this simulation to generate data for tables and figures.


library(pacman)
p_load(tidyverse, msm, sandwich, readxl)
set.seed(12345)
rm(list = ls())

setwd('C:/Users/ganth/Dropbox/StrengthInNumbersReplicationPackage/replicable/simulations')

#########################################################################################

# begin simulation
# set up accounting
F1<-c('F','M','M','M','M')
F3<-c('F','F','F','M','M')

gender <- c(rep(F1, 39), rep(F3, 36))
index <- 1:length(gender)
study <- 1
groupindex <- paste0("ACCT",round(ceiling(index/5), 0))
condition <- c(rep('F1', 39*5), rep('F3', 36*5))

df.main <- as.data.frame(cbind(index, groupindex, study, gender, condition))


# set up AH
F1<-c('F','F','M','M','M','M')
F3<-c('F','F','F','F','M','M')

gender <- c(rep(F1, 63), rep(F3, 60))
index <- 1:length(gender)
study <- 2
groupindex <- paste0("AH", round(ceiling(index/6), 0))
condition <- c(rep('F1', 63*6), rep('F3', 60*6))

df.main <- rbind(df.main, as.data.frame(cbind(index, groupindex, study, gender, condition)))
# all set up




# function to start here
simulateEfx <- function(selfpnlty){
  library(dplyr)
  
  out.matrix <- as.data.frame(matrix(ncol = 5, nrow = reps))
  names(out.matrix) <- c('FSelfvoteRatioBest1F', 'FSelfvoteRatioBest3F', 'MSelfvoteRatioBest1F', 'MSelfvoteRatioBest3F', 'selfdiscrimSD')
  
  # loop would start here
  for (i in 1:reps) {
    df <- df.main
    # pure performance
    performance <- as.numeric(rnorm(nrow(df.main), mean = 0, sd = 1))
    df$performance <- performance
    df <- df %>% group_by(groupindex) %>% mutate(performanceRank = rank(performance))
    
    # performance with penalty for women
    df <- df %>% mutate(performancePenalized = case_when(gender=="F" ~ performance, # - pnlty,
                                                         gender=="M" ~ performance,
                                                         TRUE ~ 999))
    
    df <- df %>% group_by(groupindex) %>% mutate(performancePenalizedRank = rank(performancePenalized))
    
    df <- df %>% mutate(performancePenalizedForSelf = case_when(gender=="F" ~ performance - selfpnlty,
                                                                gender=="M" ~ performance,
                                                                TRUE ~ 999))
    df <- df %>% group_by(groupindex) %>% mutate(performancePenalizedForSelfRank = rank(performancePenalizedForSelf))
    
    # simulate vote for best (rank = 5 or 6) or worst (rank = 1) person
    bestNondiscrimByGroup <- df %>% filter((performanceRank==5 & study==1) |
                                             (performanceRank==6 & study==2)) %>%
      select(groupindex, gender, index, performance) %>% rename(genderBest = gender, indexBest = index, performanceBest = performance)
    
    # simulate vote for second best (rank = 4 or 5)
    secondBestNondiscrimByGroup <- df %>% filter((performanceRank==4 & study==1) |
                                                   (performanceRank==5 & study==2)) %>%
      select(groupindex, gender, performance) %>% rename(genderSecondBest = gender, performanceSecondBest = performance)
    
    # simulate vote for best (rank = 5 or 6) or worst (rank = 1) person after penalty
    bestWdiscrimByGroup <- df %>% filter((performancePenalizedRank==5 & study==1)|
                                           (performancePenalizedRank==6 & study==2)) %>%
      select(groupindex, gender) %>% rename(genderBestAfterPenalty = gender)
    
    
    
    df <- merge(df, bestNondiscrimByGroup, by = 'groupindex', all.x = TRUE, all.y = TRUE)
    df <- merge(df, bestWdiscrimByGroup, by = 'groupindex', all.x = TRUE, all.y = TRUE)
    df <- merge(df, secondBestNondiscrimByGroup, by = 'groupindex', all.x = TRUE, all.y = TRUE)
    
    df <- df %>% mutate(selfVote = case_when(gender == "M" & index == indexBest ~ 1,
                                             gender == "M" & index != indexBest ~ 0,
                                             gender == "F" & index != indexBest ~ 0,
                                             gender == "F" & index == indexBest & ((performanceBest - performanceSecondBest) <= selfpnlty) ~ 0,
                                             gender == "F" & index == indexBest & ((performanceBest - performanceSecondBest) > selfpnlty) ~ 1,
                                             TRUE ~ 999)
    )
    
    
    df <- df %>% mutate(meanvotesSelf = case_when(
      study==1 ~ selfVote/(1/5),
      study==2 ~ selfVote/(1/6),
      TRUE ~ 999))
    
    # construct measures
    measuresSelfVote <- df %>% group_by(condition, gender) %>% summarize(meanvotesSelf = mean(meanvotesSelf))
    
    
    out.matrix$FSelfvoteRatioBest1F[i] <- measuresSelfVote$meanvotesSelf[1]
    out.matrix$FSelfvoteRatioBest3F[i] <- measuresSelfVote$meanvotesSelf[3]
    out.matrix$MSelfvoteRatioBest1F[i] <- measuresSelfVote$meanvotesSelf[2]
    out.matrix$MSelfvoteRatioBest3F[i] <- measuresSelfVote$meanvotesSelf[4]
    
    
    
    print(paste(selfpnlty, "-",i))
    
  }
  
  out.matrix$selfdiscrimSD <- selfpnlty
  return(out.matrix)
}



max <- 1.95
series <- seq(0, max, .05)

parallel <- TRUE
reps <- 1000

start <- Sys.time()
if (parallel == F) {
  loopOut <- lapply(series, simulateEfx)
} else {
  library(parallel)
  no_cores <- detectCores()
  cl <- makeCluster(no_cores)
  clusterExport(cl, list("reps", "df.main"))
  loopOut <- parLapply(cl, series, simulateEfx)
  stopCluster(cl)
}
print(paste("runtime:", Sys.time() - start))

loopOut.df <- do.call(rbind.data.frame, loopOut)
loopOut.df$`Performance penalty for women on themselves (SD)` <- loopOut.df$selfdiscrimSD

# capture linear models here on the microdata
femaleMajorityTrend <- lm(data = loopOut.df, FSelfvoteRatioBest3F ~ `Performance penalty for women on themselves (SD)` + I(`Performance penalty for women on themselves (SD)`^2))
maleMajorityTrend <- lm(data = loopOut.df, FSelfvoteRatioBest1F ~ `Performance penalty for women on themselves (SD)` + I(`Performance penalty for women on themselves (SD)`^2))

femaleMajorityTrend_inverted <- lm(data = loopOut.df,  `Performance penalty for women on themselves (SD)` ~ FSelfvoteRatioBest3F + I(FSelfvoteRatioBest3F^2))
maleMajorityTrend_inverted <- lm(data = loopOut.df, `Performance penalty for women on themselves (SD)` ~ FSelfvoteRatioBest1F + I(FSelfvoteRatioBest1F^2))


# now aggregate
loopOut.df.aggregated <- loopOut.df %>% select(-selfdiscrimSD) %>%
  group_by(`Performance penalty for women on themselves (SD)`) %>%
  summarise_all(list(mean, sd), na.rm = TRUE)

names<-names(loopOut.df.aggregated)
names<- gsub("_fn1","", names)
names<- gsub("_fn2","_sd", names)
names(loopOut.df.aggregated) <- names

loopOut.df.aggregated$FSelfvoteRatioBest3F_ciUB <- loopOut.df.aggregated$FSelfvoteRatioBest3F+1.96*loopOut.df.aggregated$FSelfvoteRatioBest3F_sd
loopOut.df.aggregated$FSelfvoteRatioBest3F_ciLB <- loopOut.df.aggregated$FSelfvoteRatioBest3F-1.96*loopOut.df.aggregated$FSelfvoteRatioBest3F_sd

loopOut.df.aggregated$FSelfvoteRatioBest1F_ciUB <- loopOut.df.aggregated$FSelfvoteRatioBest1F+1.96*loopOut.df.aggregated$FSelfvoteRatioBest1F_sd
loopOut.df.aggregated$FSelfvoteRatioBest1F_ciLB <- loopOut.df.aggregated$FSelfvoteRatioBest1F-1.96*loopOut.df.aggregated$FSelfvoteRatioBest1F_sd

save(loopOut.df, loopOut.df.aggregated, femaleMajorityTrend, maleMajorityTrend, femaleMajorityTrend_inverted, maleMajorityTrend_inverted,max,
     file = "simulations_output_1.Rda")